home *** CD-ROM | disk | FTP | other *** search
- # rules for python
- # based on grammar given in Programming Python by Mark Lutz
-
- # EDIT THIS: THE DIRECTORY IN WHICH TO MARSHAL THE
- # GRAMMAR DATA STRUCTURES.
- #
- ARCHIVE = "."
-
- marshalfilename = ARCHIVE + "/pygram.mar"
-
- pyrules = """
-
- all ::
-
- ## input terminates with "fake" dedent (forces read of all file)
-
- @R all1 :: all >> file_input DEDENT
-
- ## 1 term newline
-
- ##@R lead_blank :: file_input >> NEWLINE file_input
-
- @R top_stmt :: file_input >> file_input stmt
- @R file_input :: file_input >> stmt
-
-
- ## 2
- @R simple :: stmt >> simple_stmt
- @R compound :: stmt >> compound_stmt
-
- ## 3 punct ; term NEWLINE
- @R one_small :: simple_stmt >> small_stmt NEWLINE
- @R more_small :: simple_stmt >> small_stmt ; simple_stmt
- @R small_semi :: simple_stmt >> small_stmt ; NEWLINE
-
- ## 4 kw pass
- @R smexpr :: small_stmt >> expr_stmt
- @R smassn :: small_stmt >> assn
- @R smprint :: small_stmt >> print_stmt
- @R smdel :: small_stmt >> del_stmt
- @R smpass :: small_stmt >> pass
- @R smflow :: small_stmt >> flow_stmt
- @R smimport :: small_stmt >> import_stmt
- @R smglobal :: small_stmt >> global_stmt
- ## access ignored
- @R smexec :: small_stmt >> exec_stmt
-
- ## 5
- @R cmif :: compound_stmt >> if_stmt
- @R cmwhile :: compound_stmt >> while_stmt
- @R cmfor :: compound_stmt >> for_stmt
- @R cmtry :: compound_stmt >> try_stmt
- @R cmdef :: compound_stmt >> funcdef
- @R cmclass :: compound_stmt >> classdef
-
- ##6
- @R exprlist :: expr_stmt >> testlist
- ##@R assignment :: expr_stmt >> assn
- @R assn1 :: assn >> testlist = testlist
-
- @R assnn :: assn >> testlist = assn
-
- @R assn1c :: assn >> testlist , = testlist
-
- @R assn1c2 :: assn >> testlist , = testlist ,
-
- @R assnnc :: assn >> testlist , = assn
-
- ##testing @R exprassn :: expr_stmt >> expr_stmt = testlist
-
- @R exprlistc :: expr_stmt >> testlist ,
-
- ##testing @R exprassnc :: expr_stmt >> expr_stmt = testlist ,
-
- ##7 kw print
- @R rprint0 :: print_stmt >> print
- @R rprint :: print_stmt >> print testlist
- @R rprintc :: print_stmt >> print testlist ,
-
- ##8 kw del
- @R rdel :: del_stmt >> del exprlist
-
- ##9 trivially handled in #4
-
- ##10 kw raise continue break return
-
- ## eliminates 11 12 13 14
- @R rbreak :: flow_stmt >> break
- @R rcontinue :: flow_stmt >> continue
- @R rreturn0 :: flow_stmt >> return
- @R rreturn :: flow_stmt >> return testlist
- @R rreturnc :: flow_stmt >> return testlist ,
- @R rraise1 :: flow_stmt >> raise test
- @R rraise2 :: flow_stmt >> raise test , test
- @R rraise3 :: flow_stmt >> raise test , test , test
-
- ## 11 12 13 14 skipped
-
- ## 15 kw import from
- @R rimport :: import_stmt >> import dotted_name_list
- @R rimportc :: import_stmt >> import dotted_name_list ,
- @R dnlist1 :: dotted_name_list >> dotted_name
- @R dnlistn :: dotted_name_list >> dotted_name_list , dotted_name
- @R rfrom :: import_stmt >> from dotted_name import name_list
- @R rfroms :: import_stmt >> from dotted_name import *
- @R rfromc :: import_stmt >> from dotted_name import name_list ,
- @R nlistn :: name_list >> name_list , NAME
- @R nlist1 :: name_list >> NAME
-
- ##16 nt NAME
- @R dn1 :: dotted_name >> NAME
- @R dnn :: dotted_name >> dotted_name . NAME
-
- ##17 kw global
- @R global1 :: global_stmt >> global NAME
- @R globaln :: global_stmt >> global_stmt , NAME
-
- ## 18 19 ignored
-
- ##20 kw exec in
- @R exec1 :: exec_stmt >> exec expr
- @R exec2 :: exec_stmt >> exec expr in test
- @R exec3 :: exec_stmt >> exec expr in test , test
-
- ##21 kw if elif else punct :
- @R ifr :: if_stmt >> if test : suite elifs
- @R elifs0 :: elifs >>
- @R relse :: elifs >> else : suite
- @R elifsn :: elifs >> elif test : suite elifs
-
- ##22 kw while
- @R while1 :: while_stmt >>
- while test :
- suite
- @R while2 :: while_stmt >>
- while test :
- suite
- else :
- suite
-
- ##23 kw for
- @R for1 :: for_stmt >>
- for exprlist in testlist :
- suite
- @R for2 :: for_stmt >>
- for exprlist in testlist :
- suite
- else :
- suite
-
- ##24 kw try
- @R tryr :: try_stmt >> try : suite excepts
- @R excepts1 :: excepts >> except_clause : suite
- @R excepts2 :: excepts >> except_clause : suite else : suite
- @R exceptsn :: excepts >> except_clause : suite excepts
- @R tryf :: try_stmt >> try : suite finally : suite
-
- ##25 kw except
- @R except0 :: except_clause >> except
- @R except1 :: except_clause >> except test
- @R except2 :: except_clause >> except test , test
-
- ##26
- @R class1 :: classdef >> class NAME : suite
- @R class2 :: classdef >> class NAME ( testlist ) : suite
-
- ##27 kw def
- @R rdef :: funcdef >> def NAME parameters : suite
-
- ##28, 29 punct = *
-
- ## (modified from grammar presented)
- @R params1 :: parameters >> ( varargslist )
- @R params1c :: parameters >> ( varargslist , )
- @R params2 :: varargslist >>
-
- ## this is way too permissive: fix at semantic level
- @R params3 :: varargslist >> arg
- @R params4 :: varargslist >> varargslist , arg
- @R argd :: arg >> NAME = test
- @R arg2 :: arg >> fpdef
- @R arg3 :: arg >> * NAME
- @R arg4 :: arg >> ** NAME
-
- ## 30
- @R fpdef1 :: fpdef >> NAME
- @R fpdef2 :: fpdef >> ( fplist )
- @R fpdef2c :: fpdef >> ( fplist , )
-
- ##31
- @R fplist1 :: fplist >> fpdef
- @R fplistn :: fplist >> fplist , fpdef
-
- ##32 t INDENT DEDENT
- @R ssuite :: suite >> simple_stmt
- @R csuite :: suite >> NEWLINE INDENT stmtseq DEDENT
- @R stmtseq1 :: stmtseq >> stmt
- @R stmtseqn :: stmtseq >> stmtseq stmt
-
- ##33 kw or cancels 53
- @R testor :: test >> or_test
- @R testand :: or_test >> and_test
- @R testor1 :: or_test >> or_test or and_test
- ## @R testlambda0 :: test >> lambda : test REDUNDANT
- @R testlambda1 :: test >> lambda varargslist : test
-
- ##34 kw and
- @R andnot :: and_test >> not_test
- @R andand :: and_test >> and_test and not_test
-
- ##35 kw not
- @R notnot :: not_test >> not not_test
- @R notcmp :: not_test >> comparison
-
- ##36 NOTE KWS == >= <= <> !=
- @R cmpexpr :: comparison >> expr
- @R cmplt :: comparison >> comparison < expr
- @R cmpgt :: comparison >> comparison > expr
- @R cmpeq :: comparison >> comparison == expr
- @R cmpge :: comparison >> comparison >= expr
- @R cmple :: comparison >> comparison <= expr
- @R cmpnep :: comparison >> comparison <> expr
- @R cmpne :: comparison >> comparison != expr
- @R cmpin :: comparison >> comparison in expr
- @R cmpnotin :: comparison >> comparison not in expr
- @R cmpis :: comparison >> comparison is expr
- @R cmpisnot :: comparison >> comparison is not expr
-
- ##37 kw is not punct > < ! (eliminated)
-
- ##38 p |
- @R expr_xor :: expr >> xor_expr
- @R expr_lor :: expr >> expr | xor_expr
-
- ##39 p ^
- @R xor_and :: xor_expr >> and_expr
- @R xor_xor :: xor_expr >> xor_expr ^ and_expr
-
- ##40
- @R and_shift :: and_expr >> shift_expr
- @R and_and :: and_expr >> and_expr & shift_expr
-
- ##41 note kw's << >x> note goofy x to avoid confusing the grammar
- @R shift_arith :: shift_expr >> arith_expr
- @R shift_left :: shift_expr >> shift_expr << arith_expr
- @R shift_right :: shift_expr >> shift_expr >x> arith_expr
-
- ##42
- @R arith_term :: arith_expr >> term
- @R arith_plus :: arith_expr >> arith_expr + term
- @R arith_minus :: arith_expr >> arith_expr - term
-
- ##43 p */%
- @R termfactor :: term >> factor
- @R termmul :: term >> term * factor
- @R termdiv :: term >> term / factor
- @R termmod :: term >> term % factor
-
- ## stuff for power
- @R factorpower :: factor >> power
- @R factorexp :: factor >> factor ** power
-
- ##44 p ~
- @R powera :: power >> atom trailerlist
- @R trailerlist0 :: trailerlist >>
- @R trailerlistn :: trailerlist >> trailer trailerlist
- @R powerp :: power >> + power
- @R powerm :: power >> - power
- @R poweri :: power >> ~ power
-
- ##45 t NUMBER STRING
- @R nulltup :: atom >> ( )
- @R parens :: atom >> ( testlist )
- @R parensc :: atom >> ( testlist , )
- @R nulllist :: atom >> [ ]
- @R list :: atom >> [ testlist ]
- @R listc :: atom >> [ testlist , ]
- @R nulldict :: atom >> { }
- @R dict :: atom >> { dictmaker }
- @R dictc :: atom >> { dictmaker , }
- @R repr :: atom >> ` testlist `
- ## @R reprc :: atom >> ` testlist , ` doesn't work, apparently
- @R aname :: atom >> NAME
- ## note number to be broken out into FLOAT OCTINT HEXINT INT
- @R anumber :: atom >> NUMBER
- @R astring :: atom >> stringseq
- @R stringseq0 :: stringseq >> STRING
- @R stringseqn :: stringseq >> stringseq STRING
-
- ##46
- @R nullcall :: trailer >> ( )
- @R call :: trailer >> ( arglist )
- @R callc :: trailer >> ( arglist , )
- @R index :: trailer >> [ subscriptdots ]
- @R getattr :: trailer >> . NAME
-
- ##47
- @R arg1 :: arglist >> argument
- @R argn :: arglist >> arglist , argument
- ##@R argn1 :: arglist >> arglist , NAME = test
-
- ##48 ( !!!! is this wrong in PP?)
-
- @R posarg :: argument >> test
-
- ## here the left test should be a NAME always, but parser doesn't like it
- @R namearg :: argument >> test = test
-
- ##49 this IS wrong in PP (numeric ext)
- @R nodots :: subscriptdots >> subscriptseq
- @R yesdots :: subscriptdots >> subscriptseq , . . . , subscriptseq
- @R subscript1 :: subscriptseq >> subscript
- @R subscriptn :: subscriptseq >> subscriptseq , subscript
- @R subscriptt :: subscript >> test
- @R subscripts0 :: subscript >> :
- @R subscriptsL :: subscript >> test :
- @R subscriptsR :: subscript >> : test
- @R subscripts :: subscript >> test : test
-
- ##50
- @R exprlist1 :: exprlist >> expr
- @R exprlistn :: exprlist >> exprlist , expr
-
- ##51
- @R testlist0 :: testlist >> test
- @R testlistn :: testlist >> testlist , test
-
- ##52
- @R dictmaker1 :: dictmaker >> test : test
- @R dictmaker2 :: dictmaker >> dictmaker , test : test
-
- """
-
- nonterms = """
- subscriptdots subscript arg
- argument arglist subscriptseq params trailerlist
- factor atom trailer dictmaker stringseq power
- xor_expr and_expr shift_expr arith_expr term
- and_test or_test not_test comparison comp_op expr
- fplist stmtseq varargslist assn
- expr elifs suite excepts parameters pbasic pdefault pspecial
- testlist exprlist test dotted_name_list dotted_name name_list
- if_stmt while_stmt for_stmt try_stmt funcdef classdef
- expr_stmt print_stmt del_stmt flow_stmt import_stmt global_stmt
- small_stmt compound_stmt stmt simple_stmt exec_stmt
- file_input except_clause fpdef cmp_op
- all
- """
-
- import string
- # python needs special handling for the lexical stuff
- NAMEre = "[" + string.letters + "_][" + string.letters+string.digits +"]*"
- NUMBERre = "[" + string.digits + "]+" # temporary!
- STRINGre = '"[^"\n]*"' # to be overridden in lexdict
- #NEWLINEre = "\n" # to be overridden in lexdict
- INDENTre = "#" # a fake! to be overridden
- DEDENTre = "#" # a fake! to be overridden
-
- def echo(str):
- return str
-
- def DeclareTerminals(Grammar):
- Grammar.Addterm("NAME", NAMEre, echo)
- Grammar.Addterm("NUMBER", NUMBERre, echo)
- Grammar.Addterm("STRING", STRINGre, echo)
- #Grammar.Addterm("NEWLINE", NEWLINEre, echo) # newline is kw!
- Grammar.Addterm("INDENT", INDENTre, echo)
- Grammar.Addterm("DEDENT", DEDENTre, echo)
-
- # not >x> is a fake!
- keywords = """
- and break class continue def del elif else except exec
- finally for from global if import in is lambda not or pass
- print raise return try while == >= <= <> != >x> << NEWLINE
- **
- """
-
- import kjParser, string, regex
- from kjParser import KEYFLAG, ENDOFFILETERM
-
- alphanumunder = string.letters+string.digits+"_"
- alpha = string.letters + "_"
-
- # components that are part of a identifier (cannot be next to kw).
- id_letters = map(None, alphanumunder)
-
- # terminator re for names
- nametermre = "[^" + alphanumunder + "]"
- nameterm = regex.compile(nametermre)
-
- # terminator re for numbers (same as above but allow "." in num).
- numtermre = "[^" + alphanumunder + "\.]"
- numterm = regex.compile(numtermre)
-
- parseerror = "parseerror"
-
- pycommentre = "\(#.*\)"
-
- # whitespace regex outside of brackets
- # white followed by (comment\n maybe repeated)
- # DON'T EAT NEWLINE!!
- pywhiteoutre = "\([ \t\r\014]\|\\\\\n\)*%s?" % pycommentre
- pywhiteout = regex.compile(pywhiteoutre)
-
- # whitespace regex inside brackets
- # white or newline possibly followed by comment, all maybe repeated
- pywhiteinre = pywhiteoutre #"[ \t\r]*\(\\\\\n\)*%s?" % pycommentre
- pywhitein = regex.compile(pywhiteinre)
-
- # totally blank lines (only recognize if next char is newline)
- #allblankre = "\n" + pywhiteinre
- #allblank = regex.compile(allblankre)
-
- # re for indentation (might accept empty string)
- indentp = regex.compile("[\t ]*")
-
- # two char kws and puncts
- char2kw = ["if", "or", "in", "is"]
- punct2 = ["<>", "<<", ">>", "<=", ">=", "!=", "**", "=="]
-
- # >two char kws as map of first 3 chars to others
- char3k_data = """
- and break class continue def del elif else except
- finally for from global import lambda not pass print
- raise return try while exec
- """
-
- char3kw = string.split(char3k_data)
- char3kwdict = {}
- for x in char3kw:
- char3kwdict[x[:3]] = x
-
- # NOTE: newline is treated same as a punctuation
- # NOTE: "' ARE NOT PUNCTS
- punct = "~!#%^&*()-+=|{}<>,.;:/[]{}\n`"
- punctlist = map(None, punct)
-
- kwmap = {}
- for x in char2kw + punct2 + char3kw + map(None, punct):
- # everything parses as length 1 to the outer world.
- kwmap[x] = (((KEYFLAG, x), x), 1)
-
- # special hack
- kwmap[">>"] = (((KEYFLAG, ">x>"), ">x>"), 1)
- newlineresult = kwmap["\n"] = (((KEYFLAG, "NEWLINE"), "NEWLINE"), 1)
-
- #finaldedent = (((TERMFLAG, "DEDENT"), ""), 1)
-
- # Python lexical dictionary.
-
- ### MUST HANDLE WHOLELY BLANK LINES CORRECTLY!
-
- class pylexdict(kjParser.LexDictionary):
- def __init__(self):
- kjParser.LexDictionary.__init__(self)
- # need to add special map for >>
- self.brackets = 0 # count of active brackets
- self.realindex = 0 # where to start
- self.indents = [""] # stack of indents (start with a fake one)
- self.lineno = 0
- self.atdedent = 0
- ### handle multiple dedents correctly!!!
- ### translate tabs to 8 spaces...
- from kjParser import TERMFLAG
- self.NAMEflag = (TERMFLAG, "NAME")
- self.STRINGflag = (TERMFLAG, "STRING")
- self.NEWLINEflag = (TERMFLAG, "NEWLINE")
- self.INDENTflag = (TERMFLAG, "INDENT")
- self.DEDENTflag = (TERMFLAG, "DEDENT")
- self.NUMBERflag = (TERMFLAG, "NUMBER")
-
- def endoffile(self, String):
- # pop off all indentations!
- indents = self.indents
- #lastresult = self.lastresult
- self.realindex = len(String)
- if not indents:
- # pop indents
- #print "eof after dedent"
- result = self.lastresult = (ENDOFFILETERM, 0)
- else:
- #print "eof as dedent after", self.lastresult
- del indents[-1]
- if indents:
- dedent = indents[-1]
- else:
- dedent = ""
- result = self.lastresult = ((self.DEDENTflag, dedent), 1)
- #print "returning eof", result, "after", lastresult
- return result
-
- def Token(self, String, StartPosition):
- #print "Token", (StartPosition,
- # `String[self.realindex:self.realindex+20]`, self.lastresult)
- # HAVE TO FAKE OUT LEXER FOR DEDENTS
- # STARTPOSITION COUNTS # OF TOKEN, NOT STRING POSITION
- # STRING POSITION IS MAINTAINED IN LexDict object.
- lastindex = self.lastindex
- lastresult = self.lastresult
- if self.laststring is not String:
- #print "parsing new string"
- self.laststring = String
- # special hack: skip lead whitespace
- cursor = 0
- self.lineno = 1
- while 1:
- test = pywhitein.match(String, cursor)
- if test<0: break
- next = cursor + test
- #print "lead skip:", next, String[cursor:next]
- if String[next]!="\n": break
- #skipped = String[cursor:next]
- #if "\n" in skipped:
- # self.lineno = (
- # self.lineno + len(string.splitfields(skipped, "\n")))
- #self.lineno = self.lineno+1
- cursor = next + 1
- self.realindex = cursor
- self.saveindex = 0
- self.indents = [""] # stack of indents (start with a fake one)
- # pretend we saw a newline
- self.lastresult = newlineresult
- if StartPosition!=0:
- self.laststring = None
- raise ValueError, "python lexical parsing must start at zero"
- lastindex = self.lastindex
- lastresult = None
- elif lastindex == StartPosition:
- #print "returning lastresult ", lastresult
- return lastresult
- elif lastindex != StartPosition-1:
- raise ValueError, "python lexer can't skip tokens"
-
- #print "parsing", StartPosition, lastresult
- # do newline counting here!
- delta = String[self.saveindex: self.realindex]
- #print "delta", `delta`
- if "\n" in delta:
- #print self.lineno, self.saveindex, self.realindex, `delta`
- self.lineno = self.lineno + len(
- string.splitfields(delta, "\n")) - 1
- realindex = self.saveindex = self.realindex
- self.lastindex = StartPosition
-
- # skip whitespace (including comments)
- ### needs to be improved to parse blank lines, count line numbers...
- # skip all totally blank lines (don't eat last newline)
- atlineend = (String[realindex:realindex+1] == "\n"
- or lastresult is newlineresult
- or self.atdedent)
- skipnewlines = (lastresult is newlineresult or
- self.atdedent or
- self.brackets>0)
- if atlineend: #String[realindex:realindex+1]=="\n":
- #print "trying to skip blank lines", String[realindex:realindex+10]
- while 1:
- #if String[realindex:realindex+1]=="\n":
- # start = realindex+1 # move past current newline
- # self.lineno = self.lineno + 1
- #else:
- # start = realindex
- start = realindex
- if skipnewlines:
- while String[start:start+1]=="\n":
- start = start+1
- #self.lineno = self.lineno+1
- #print "matching", `String[start:start+10]`
- skip = pywhitein.match(String, start)
- #print "skip=", skip
- if skip<0: break
- rs = skip + realindex + (start-realindex)
- if rs==realindex: break
- #print "at", rs, `String[rs]`
- if (rs<len(String) and
- (String[rs] == "\n" or
- (skipnewlines and String[rs-1:rs]=="\n"))):
- #print "skipping blank line"
- #if lastresult is newlineresult or self.brackets>0:
- # rs = rs + 1
- #skipped = String[start:rs]
- #if "\n" in skipped:
- #self.lineno = self.lineno + len(
- # string.splitfields(skipped, "\n"))
- self.realindex = realindex = rs
- #self.lineno = self.lineno+1
- else:
- if skipnewlines: self.realindex = realindex = start
- break
- #print "after skipping blank lines", `String[realindex:realindex+20]`
- skipto = realindex
- skip = 0
- if self.brackets>0:
- while 1:
- #print "skipping white in brackets", skipto
- if realindex>len(String):
- break
- if String[skipto]=="\n":
- #self.lineno = self.lineno+1
- skipto = skipto + 1
- self.realindex = realindex = skipto
- continue
- skip = pywhiteout.match(String, skipto)
- nextskipto = skipto+skip
- #skipped = String[skipto:nextskipto]
- #if "\n" in skipped:
- # self.lineno = self.lineno+len(
- # string.splitfields(skipped, "\n"))
- if skip>0:
- skipto = nextskipto
- else: break
- skip = skipto - realindex
- elif not atlineend:
- skip = pywhitein.match(String, realindex)
- if skip<=0:
- skip = 0
- else:
- #print "skipping", skip
- nextri = realindex + skip
- #skipped = String[realindex:nextri]
- #if "\n" in skipped:
- # self.lineno = self.lineno + len(
- # string.splitfields(skipped, "\n"))
- realindex = self.realindex = nextri
- if realindex>=len(String):
- return self.endoffile(String)
- # now look for a keyword, name, number, punctuation,
- # INDENT, DEDENT, NEWLINE
- first = String[realindex]
- #if last parse was newline and not in brackets:
- # look for indent/dedent
- if (self.brackets<=0 and (lastresult is newlineresult or self.atdedent)
- and first != "\n"):
- #print "looking for dent", realindex, `String[realindex:realindex+20]`
- match = indentp.match(String, realindex)
- if match>=0:
- dent = String[realindex: realindex+match]
- #print "dent match", match, `dent`
- oldindex = realindex
- self.realindex = realindex = realindex+match
- # replace tabs with 8 spaces
- dent = string.joinfields(string.splitfields(dent, "\t"),
- " ")
- dents = self.indents
- lastdent = dents[-1]
- ldl = len(lastdent)
- dl = len(dent)
- #print "last", ldl, dents
- if ldl<dl:
- self.atdedent = 0
- result = self.lastresult = ((self.INDENTflag, dent), 1)
- dents.append(dent)
- #print "indent ", result, dents
- return result
- if ldl>dl:
- self.realindex = oldindex # back up, may have to see it again!
- self.atdedent = 1
- result = self.lastresult = ((self.DEDENTflag, dent), 1)
- del dents[-1]
- #print "dedent ", result, dl, dents
- return result
- # otherwise, indentation is same, keep looking
- # might be at eof now:
- if realindex>=len(String):
- #print "returning eof"
- return self.endoffile(String)
- first = String[realindex]
- self.atdedent = 0
- from string import digits #, letters
- if (first in punctlist and
- # special case for .123 numbers (yuck!)
- (first!="." or String[realindex+1] not in digits)):
- # is it a 2 char punct?
- first2 = String[realindex:realindex+2]
- if first2 in punct2:
- result = self.lastresult = kwmap[first2]
- self.realindex = realindex+2
- #print "2 digit punct", result
- return result
- # otherwise, just return normal punct
- result = self.lastresult = kwmap[first]
- self.realindex = self.realindex + 1
- ### special bookkeeping
- if first=="\n":
- result = newlineresult
- #print "newline!"
- #self.lineno = self.lineno+1
- elif first in "[{(":
- #print "bracket!"
- self.brackets = self.brackets + 1
- elif first in "]})":
- #print "close bracket!"
- self.brackets = self.brackets - 1
- #print "1 digit punct", result
- return result
- if first in digits or first==".":
- # parse a number...
- skip = numterm.search(String, realindex)
- if skip<=realindex:
- raise parseerror, "number length<1 (!)"
- thenumber = String[realindex:skip]
- self.realindex = skip
- ### note don't interpret number here!!
- result = self.lastresult = ((self.NUMBERflag, thenumber), 1)
- #print "number", result
- return result
- if first in alpha:
- # try keyword...
- first2 = String[realindex: realindex+2]
- if first2 in char2kw:
- if String[realindex+2:realindex+3] not in id_letters:
- # parse a 2 char kw first2
- result = self.lastresult = kwmap[first2]
- self.realindex = self.realindex+2
- #print "keyword 2", result
- return result
- first3 = String[realindex: realindex+3]
- if char3kwdict.has_key(first3):
- the_kw = char3kwdict[first3]
- the_end = realindex+len(the_kw)
- if ((the_end<len(String)) and
- (String[the_end] not in id_letters) and
- (String[realindex:the_end]==the_kw)):
- # parse the_kw
- self.realindex = the_end
- result = self.lastresult = kwmap[the_kw]
- #print "keyword +", result
- return result
- #otherwise parse an identifier
- #print "looking for name:", `String[realindex:realindex+10]`
- skip = nameterm.search(String, realindex)
- if skip<=realindex:
- raise parseerror, "identifier length<1 (!)"
- theid = String[realindex:skip]
- self.realindex = skip
- ### note don't interpret number here!!
- result = self.lastresult = ((self.NAMEflag, theid), 1)
- #print "id", result
- return result
- if first in "\"'":
- # check for triplequotes
- first3 = first*3
- if String[realindex: realindex+3] == first3:
- # parse triple quotes
- start = place = realindex+3
- while 1:
- last = string.find(String, first3, place)
- if last<0:
- raise parseerror, "failed to terminate triple quotes"
- if String[last-1:last]=="\\" and String[last-2:last-1]!="\\":
- place = last+1
- else: break
- the_string = String[start: last]
- self.realindex = last+3
- result = self.lastresult = ((self.STRINGflag, the_string), 1)
- #print "3q string", result
- # count the newlines!
- #newlinecount = len(string.splitfields(the_string, "\n"))
- #self.lineno = self.lineno+newlinecount
- #print "triple quotes", result
- return result
- else:
- # parse single quotes
- sanity = start = place = realindex+1
- done = 0
- while 1:
- sanity = min(string.find(String, "\n", sanity), len(String))
- if sanity<start:
- sanity=len(String)
- break
- if String[sanity-1]!="\\":
- break
- else:
- #self.lineno = self.lineno+1
- sanity = sanity + 1
- while 1:
- last = string.find(String, first, place)
- if last<0 or last>sanity:
- raise parseerror, "failed to terminate single quotes"
- if String[last-1:last]=="\\":
- # are we at the end of an odd number of backslashes? (yuck!)
- bplace = last-1
- while String[bplace:bplace+1]=="\\":
- bplace = bplace-1
- if (last-bplace)%2==1:
- break # the end quote is real!
- place = last+1
- else: break
- the_string = String[start:last]
- self.realindex = last+1
- result = self.lastresult = ((self.STRINGflag, the_string), 1)
- #print "1q string", result
- return result
- #print (String[realindex-20:realindex-1], String[realindex],
- # String[realindex+1:realindex+20])
- raise parseerror, "invalid first: " + `first`
-
- # use a modified lexstringwalker
- class pylexstringwalker(kjParser.LexStringWalker):
- def DUMP(self):
- kjParser.DumpStringWindow(self.String, self.LexDict.realindex)
-
- ## a HORRIBLE HACK! of a hack: override the DoParse of Grammar
- ## to give Python line numbers. RELIES ON GLOBAL pyg
- ##
- def hackDoParse(String, Context=None, DoReductions=1):
- import sys, kjParser
- try:
- # construct the ParserObj
- # add a newline to front to avoid problem with leading comment
- #String = "\n%s\n" % String
- Stream = pylexstringwalker( String, pyg.LexD )
- Stack = [] # {-1:0} #Walkers.SimpleStack()
- ParseOb = kjParser.ParserObj( pyg.RuleL, Stream, pyg.DFA, Stack, \
- DoReductions, Context )
- # do the parse
- ParseResult = ParseOb.GO()
- # return final result of reduction and the context
- return (ParseResult[1], Context)
- #return kjParser.Grammar.DoParse(pyg, String, Context, DoReductions)
- except: ### for testing!!
- t, v = sys.exc_type, sys.exc_value
- v = ("near line", pyg.LexD.lineno, v)
- raise t, v
-
- buildinfo = """
- Please edit the ARCHIVE parameter of this module (%s)
- to place the python grammar archive in a standard
- directory to prevent the module from rebuilding
- the python grammar over and over and over...
- """ % __name__
-
- def GrammarBuild():
- global pyg
- import kjParseBuild
- pyg = kjParseBuild.NullCGrammar()
- pyg.DoParse = hackDoParse
- # override lexical dict here
- pyg.LexD = pylexdict()
- DeclareTerminals(pyg)
- pyg.Keywords(keywords)
- pyg.punct("~!#%^&*()-+=|{}'`<>,.;:/[]{}")
- pyg.Nonterms(nonterms)
- pyg.Declarerules(pyrules)
- print buildinfo
- print "compiling... this may take a while..."
- pyg.Compile()
- print "dumping"
- outfile = open(marshalfilename, "wb")
- pyg.MarshalDump(outfile)
- outfile.close()
- print "self testing the grammar"
- test(pyg)
- print "\n\ndone with regeneration"
- return pyg
-
- def unMarshalpygram():
- global pyg
- import kjParser
- print "loading"
- try:
- infile = open(marshalfilename, "rb")
- except IOError:
- print marshalfilename, "not found, attempting creation"
- pyg = GrammarBuild()
- else:
- pyg = kjParser.UnMarshalGram(infile)
- infile.close()
- pyg.DoParse = hackDoParse
- # lexical override
- pyg.LexD = pylexdict()
- DeclareTerminals(pyg)
- # BindRules(pyg)
- if dotest:
- print "self testing the grammar"
- test(pyg)
- return pyg
-
-
- # not used, commented
- #### interpretation rules/classes
- #
- #def zeroth(list, Context):
- # return list[0] # eg, for all1, ignore all but first
- #
- ## file_input, stmt, simple_stmt, compound_stmt give list of statement_ob
- #def append(list, Context):
- # "eg, for top_stmt, conjoin two smt lists"
- # return list[0] + list[1]
- #
- ## file_input >zeroth
- #
- ## simple, compound, one_small, small_semi: echol
- #def echol(list, Context):
- # return list
- #
- ## more_small > seq_sep
- #def seq_sep(list, Context):
- # list[0].append(list[2])
- # return list[0]
- #
- ## smexpr, smassn, smpring, smdel, smflow, smimport, smglobal, smexec
- ## > zeroth
- #
- ## cmif, cmwhile, cmfor, cmtry, cmdef, cmclass > zeroth
- #
- #
- #def BindRules(pyg):
- # for name in string.split("""
- # all1 file_input cmif cmwhile cmfor cmtry cmdef cmclass
- # smexpr smassn smprint smdel smflow smimport smglobal smexec
- # """):
- # pyg.Bind(name, zeroth)
- # for name in string.split("""
- # simple compound one_small small_semi
- # """):
- # pyg.Bind(name, echol)
- # pyg.Bind("top_stmt", append)
- # pyg.Bind("more_small", seq_sep)
-
- teststring = """#
- #
- # a test string
- #
- from string import join, split
- '''
- import regex
-
- for a in l:
- a.attr, a[x], b = c
- else:
- d = b
- '''
- class zzz:
- '''
- #doc string
- '''
- '''
- global regex, join
-
- d = {}
- for i in range(10): d[i] = i
- '''
- def test(c,s):
- return "this"
- while not done:
- print done
- break
- list = [1,2,3]
- # comment
- return 5
-
-
- n,x = 89 >> 90 + 6 / 7 % x + z << 6 + 2 ** 8
-
- if x==5:
- while y:
- for i in range(6):
- raise SystemError, "oops"
-
-
- """
-
- #teststring ="""\
- ## comment
- #if x in y: print z
- #elif 1: print w
- #"""
-
- '''
- teststring="""
- exec "print 1"
- """
- '''
-
- def test(grammar, context=None, teststring=teststring):
- from time import time
- now = time()
- x = grammar.DoParse1(teststring, context)
- elapsed = time()-now
- print x
- print elapsed
- return x
-
- regen = 0
- dotest = 0
-
- if __name__ == "__main__" :
- if regen: GrammarBuild()
- unMarshalpygram()
-